PYTORCH_HIP_ALLOC_CONF=expandable_segments:True \
nohup vllm serve CodeGoat24/UnifiedReward-qwen-7b \
    --host 0.0.0.0 \
    --trust-remote-code \
    --served-model-name UnifiedReward \
    --gpu-memory-utilization 0.85 \
    --tensor-parallel-size 4 \
    --pipeline-parallel-size 1 \
    --limit-mm-per-prompt image=1 \
    --max-model-len 1024 \
    --max-num-seqs 2 \
    --max-num-batched-tokens 1024 \
    --port 8080 > vllm.log 2>&1 &

# vllm serve CodeGoat24/UnifiedReward-qwen-7b \
#     --host 0.0.0.0 \
#     --trust-remote-code \
#     --served-model-name UnifiedReward \
#     --gpu-memory-utilization 0.9 \
#     --tensor-parallel-size 4 \
#     --pipeline-parallel-size 1 \
#     --limit-mm-per-prompt image=2 \
#     --port 8080

# vllm serve CodeGoat24/UnifiedReward-7b-v1.5 \
#     --host 0.0.0.0 \
#     --trust-remote-code \
#     --served-model-name UnifiedReward \
#     --gpu-memory-utilization 0.9 \
#     --tensor-parallel-size 4 \
#     --pipeline-parallel-size 1 \
#     --port 8080